{
  "cells": [
    {
      "cell_type": "markdown",
      "id": "3b46a078",
      "metadata": {
        "id": "3b46a078"
      },
      "source": [
        "Some of the package imports and functions will not be relevant for you, but I have left my full startup intact so you can look through it and use whatever you like.\n",
        "\n",
        "Because I don't know exactly what your data format looks like I have avoided including spaces for you to import specific files and instead have tried to provide instructions at the neccesary stages. The next two cells are just package imports and custom functions which you can ignore reading through for now"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "ceb35df1",
      "metadata": {
        "id": "ceb35df1"
      },
      "outputs": [],
      "source": [
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "import matplotlib as mpl\n",
        "import netCDF4 as nc\n",
        "import load_cube_module as lc\n",
        "import skimage\n",
        "import scipy as sp\n",
        "import skimage\n",
        "import os\n",
        "from skimage.transform import resize\n",
        "import tensorflow as tf\n",
        "from sklearn.model_selection import train_test_split, KFold\n",
        "import keras as kr\n",
        "from keras.models import Sequential\n",
        "from keras.layers.normalization import BatchNormalization\n",
        "from keras.layers.convolutional import Conv1D\n",
        "from keras.layers.convolutional import MaxPooling2D\n",
        "from keras.layers.core import Activation\n",
        "from keras.layers.core import Dropout\n",
        "from keras.layers.core import Dense\n",
        "from keras.layers import Flatten\n",
        "from keras.layers import Input\n",
        "from keras.models import Model\n",
        "from keras.layers import concatenate\n",
        "from keras.utils import to_categorical\n",
        "import os\n",
        "import random\n",
        "import pandas as pd\n",
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "#plt.style.use(\"ggplot\")\n",
        "#%matplotlib inline\n",
        "from tqdm import tqdm_notebook, tnrange\n",
        "from itertools import chain\n",
        "from skimage.io import imread, imshow, concatenate_images\n",
        "from skimage.transform import resize\n",
        "from skimage.morphology import label\n",
        "from sklearn.model_selection import train_test_split, KFold\n",
        "\n",
        "import tensorflow as tf\n",
        "\n",
        "from keras import *\n",
        "from keras.models import Model, load_model\n",
        "from keras.layers import Input, BatchNormalization, Activation, Dense, Dropout\n",
        "from keras.layers.core import Lambda, RepeatVector, Reshape\n",
        "from keras.layers.convolutional import Conv2D, Conv2DTranspose\n",
        "from keras.layers.pooling import MaxPooling2D, GlobalMaxPool2D\n",
        "from keras.layers.merge import concatenate, add\n",
        "from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau\n",
        "from keras.optimizers import Adam\n",
        "from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img\n",
        "\n",
        "\n",
        "import matplotlib\n",
        "font = {'family' : 'normal',\n",
        "        'weight' : 'bold',\n",
        "        'size'   : 22}\n",
        "\n",
        "matplotlib.rc('font', **font)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "ae390501",
      "metadata": {
        "id": "ae390501"
      },
      "outputs": [],
      "source": [
        "from keras import backend as K\n",
        "smooth = 1e-12\n",
        "def jaccard_coef(y_true, y_pred):\n",
        "    intersection = K.sum(y_true * y_pred, axis=[0, -1, -2])\n",
        "    sum_ = K.sum(y_true + y_pred, axis=[0, -1, -2])\n",
        "\n",
        "    jac = (intersection + smooth) / (sum_ - intersection + smooth)\n",
        "\n",
        "    return K.mean(jac)\n",
        "\n",
        "\n",
        "def jaccard_coef_int(y_true, y_pred):\n",
        "    y_pred_pos = K.round(K.clip(y_pred, 0, 1))\n",
        "\n",
        "    intersection = K.sum(y_true * y_pred_pos, axis=[0, -1, -2])\n",
        "    sum_ = K.sum(y_true + y_pred_pos, axis=[0, -1, -2])\n",
        "\n",
        "    jac = (intersection + smooth) / (sum_ - intersection + smooth)\n",
        "\n",
        "    return K.mean(jac)\n",
        "\n",
        "\n",
        "def jaccard_coef_loss(y_true, y_pred):\n",
        "    return -K.log(jaccard_coef(y_true, y_pred)) + K.binary_crossentropy(y_pred, y_true)\n",
        "\n",
        "def select_plume_location(noise, plume_shape, nan_threshold = 0.1):\n",
        "    success = 0\n",
        "    while success == 0:\n",
        "        noise_shape = noise.shape\n",
        " #       print(noise)\n",
        "        coord_range = np.subtract(noise_shape,plume_shape)\n",
        "        x = np.random.randint(0,coord_range[0])\n",
        "        y = np.random.randint(0,coord_range[1])\n",
        "\n",
        "        #location selected now check to make sure chosen range is in area with data\n",
        "        selected_noise_area = noise[x:x+plume_shape[0],y:y+plume_shape[1]]\n",
        "        nan_sum = 0\n",
        "        nanmap = np.zeros(selected_noise_area.shape)\n",
        "        for j in range(plume_shape[0]):\n",
        "            for k in range(plume_shape[1]):\n",
        "                if np.isnan(selected_noise_area[j,k])==True:\n",
        "                    nan_sum = nan_sum+1\n",
        "                    nanmap[j,k]= 1\n",
        "        nan_fraction = nan_sum/(plume_shape[0]*plume_shape[1])\n",
        "        if nan_fraction<nan_threshold:\n",
        "            success = 1\n",
        "    return [x,y,nanmap]\n",
        "\n",
        "def get_background_images_single_file(noise,num_of_imgs):\n",
        "    out = np.zeros((num_of_imgs,128,128))\n",
        "    for j in range(num_of_imgs):\n",
        "        x,y,nanmap = select_plume_location(noise, plume_shape = (128,128), nan_threshold = 0.1)\n",
        "        out[j] = noise[x:x+128,y:y+128]\n",
        "    return out\n",
        "\n",
        "def get_background_images_multiple_files(directory, imgs_per_file, date):\n",
        "\n",
        "    counter = 0\n",
        "    for file in os.listdir(directory):\n",
        "        if \".npy\" in file:\n",
        "            counter = counter+1\n",
        "    out = np.zeros((counter*imgs_per_file,128,128))\n",
        "    counter = 0\n",
        "    for file in os.listdir(directory):\n",
        "        if \".npy\" in file:\n",
        "            print(counter)\n",
        "            noise_temp = np.load(directory+file)\n",
        "            out_temp = get_background_images_single_file(noise_temp, imgs_per_file)\n",
        "            out[counter*imgs_per_file:(counter*imgs_per_file)+imgs_per_file] = out_temp\n",
        "            counter = counter+1\n",
        "    np.save(path_hd+ 'background_c1_imgs_' +str(imgs_per_file)+'_per_file_'+str(date), out)\n",
        "    return out\n",
        "\n",
        "\n",
        "def conv2d_block(input_tensor, n_filters, kernel_size=3, batchnorm=True):\n",
        "    # first layer\n",
        "    x = Conv2D(filters=n_filters, kernel_size=(kernel_size, kernel_size), kernel_initializer=\"he_normal\",\n",
        "               padding=\"same\")(input_tensor)\n",
        "    if batchnorm:\n",
        "        x = BatchNormalization()(x)\n",
        "    x = Activation(\"relu\")(x)\n",
        "    # second layer\n",
        "    x = Conv2D(filters=n_filters, kernel_size=(kernel_size, kernel_size), kernel_initializer=\"he_normal\",\n",
        "               padding=\"same\")(x)\n",
        "    if batchnorm:\n",
        "        x = BatchNormalization()(x)\n",
        "    x = Activation(\"relu\")(x)\n",
        "    return x\n",
        "\n",
        "def get_unet(input_img, n_filters=16, dropout=0.5, batchnorm=True):\n",
        " #   contracting path\n",
        "    c1 = conv2d_block(input_img, n_filters=n_filters*1, kernel_size=3, batchnorm=batchnorm)\n",
        "    p1 = MaxPooling2D((2, 2)) (c1)\n",
        "    p1 = Dropout(dropout*0.5)(p1)\n",
        "\n",
        "    c2 = conv2d_block(p1, n_filters=n_filters*2, kernel_size=3, batchnorm=batchnorm)\n",
        "    p2 = MaxPooling2D((2, 2)) (c2)\n",
        "    p2 = Dropout(dropout)(p2)\n",
        "\n",
        "    c3 = conv2d_block(p2, n_filters=n_filters*4, kernel_size=3, batchnorm=batchnorm)\n",
        "    p3 = MaxPooling2D((2, 2)) (c3)\n",
        "    p3 = Dropout(dropout)(p3)\n",
        "\n",
        "    c4 = conv2d_block(p3, n_filters=n_filters*8, kernel_size=3, batchnorm=batchnorm)\n",
        "    p4 = MaxPooling2D(pool_size=(2, 2)) (c4)\n",
        "    p4 = Dropout(dropout)(p4)\n",
        "\n",
        "    c5 = conv2d_block(p4, n_filters=n_filters*16, kernel_size=3, batchnorm=batchnorm)\n",
        "\n",
        "    # expansive path\n",
        "    u6 = Conv2DTranspose(n_filters*8, (3, 3), strides=(2, 2), padding='same') (c5)\n",
        "    u6 = concatenate([u6, c4])\n",
        "    u6 = Dropout(dropout)(u6)\n",
        "    c6 = conv2d_block(u6, n_filters=n_filters*8, kernel_size=3, batchnorm=batchnorm)\n",
        "\n",
        "    u7 = Conv2DTranspose(n_filters*4, (3, 3), strides=(2, 2), padding='same') (c6)\n",
        "    u7 = concatenate([u7, c3])\n",
        "    u7 = Dropout(dropout)(u7)\n",
        "    c7 = conv2d_block(u7, n_filters=n_filters*4, kernel_size=3, batchnorm=batchnorm)\n",
        "\n",
        "    u8 = Conv2DTranspose(n_filters*2, (3, 3), strides=(2, 2), padding='same') (c7)\n",
        "    u8 = concatenate([u8, c2])\n",
        "    u8 = Dropout(dropout)(u8)\n",
        "    c8 = conv2d_block(u8, n_filters=n_filters*2, kernel_size=3, batchnorm=batchnorm)\n",
        "\n",
        "    u9 = Conv2DTranspose(n_filters*1, (3, 3), strides=(2, 2), padding='same') (c8)\n",
        "    u9 = concatenate([u9, c1], axis=3)\n",
        "    u9 = Dropout(dropout)(u9)\n",
        "    c9 = conv2d_block(u9, n_filters=n_filters*1, kernel_size=3, batchnorm=batchnorm)\n",
        "\n",
        "    outputs = Conv2D(1, (1, 1), activation='sigmoid')(c9)\n",
        "    model = Model(inputs=[input_img], outputs=[outputs])\n",
        "    return model\n",
        "\n",
        "def random_plume_rotation(plumes):\n",
        "    out = np.zeros(plumes.shape)\n",
        "    print(out.shape)\n",
        "    for j in range(len(plumes)):\n",
        "        out[j] = plumes[j]\n",
        "        rand = np.random.randint(1,5)\n",
        "        plume_rot = np.rot90(plumes[j,0,:,:],rand)\n",
        "        out[j,0,:,:] = plume_rot\n",
        "        if rand==1:\n",
        "            out[j,1,:,:] = -plumes[j,2,:,:]\n",
        "            out[j,2,:,:] = plumes[j,1,:,:]\n",
        "        if rand==2:\n",
        "            out[j,1,:,:] = -plumes[j,1,:,:]\n",
        "            out[j,2,:,:] = -plumes[j,2,:,:]\n",
        "        if rand==3:\n",
        "            out[j,1,:,:] = plumes[j,2,:,:]\n",
        "            out[j,2,:,:] = -plumes[j,1,:,:]\n",
        "    return out\n",
        "\n",
        "def get_corr_factor(rate_kg_h, rate_sim = 12):\n",
        "#    rate_sim = 12 #units/s #take as mols/s\n",
        "    molar_mass = 0.016043 #kg/mol of methane\n",
        "    rate_mols_s = rate_kg_h/molar_mass/3600 #mols/s\n",
        "    corr_factor = rate_mols_s/rate_sim\n",
        "    return corr_factor\n",
        "\n",
        "\n",
        "def get_full_imgs(back,plumes,sr_range):\n",
        "    out = np.zeros(back.shape)\n",
        "    sr = np.random.uniform(sr_range[0],sr_range[1],len(back))\n",
        "    for j in range(len(back)):\n",
        "        c_fac = get_corr_factor(sr[j])\n",
        "        plume = plumes[j]/(25**2)*c_fac\n",
        "#        out_masks = plume\n",
        "        out[j] = plume+back[j]\n",
        "        out[j] = np.nan_to_num(out[j],nan=0)\n",
        "    return out, sr #out_masks\n",
        "\n",
        "def get_full_imgs_with_winds(back,plumes,sr_range):\n",
        "    out = np.zeros(plumes.shape)\n",
        "    print(out.shape)\n",
        "\n",
        "    sr = np.random.uniform(sr_range[0],sr_range[1],len(back))\n",
        "\n",
        "    out[:,1,:,:] = plumes[:,1,:,:]\n",
        "    out[:,2,:,:] = plumes[:,2,:,:]\n",
        "    for j in range(len(back)):\n",
        "        c_fac = get_corr_factor(sr[j])\n",
        "        plume = plumes[j,0,:,:]/(25**2)*c_fac\n",
        "#        out_masks = plume\n",
        "        out[j,0,:,:] = plume+back[j]\n",
        "        out[j,0,:,:] = np.nan_to_num(out[j,0,:,:],nan=0)\n",
        "    return out, sr #out_masks\n",
        "\n",
        "def get_sr_cnn_deep(input_img, n_filters=16, dropout=0.5, batchnorm = True):\n",
        "    c1 = conv2d_block(input_img, n_filters=n_filters*1, kernel_size=3, batchnorm=batchnorm)\n",
        "    p1 = MaxPooling2D((2, 2)) (c1)\n",
        "    p1 = Dropout(dropout*0.5)(p1)\n",
        "\n",
        "    c2 = conv2d_block(p1, n_filters=n_filters*2, kernel_size=3, batchnorm=batchnorm)\n",
        "    p2 = MaxPooling2D((2, 2)) (c2)\n",
        "    p2 = Dropout(dropout)(p2)\n",
        "\n",
        "    c3 = conv2d_block(p2, n_filters=n_filters*4, kernel_size=3, batchnorm=batchnorm)\n",
        "    p3 = MaxPooling2D((2, 2)) (c3)\n",
        "    p3 = Dropout(dropout)(p3)\n",
        "\n",
        "    c4 = conv2d_block(p3, n_filters=n_filters*8, kernel_size=3, batchnorm=batchnorm)\n",
        "    p4 = MaxPooling2D(pool_size=(2, 2)) (c4)\n",
        "    p4 = Dropout(dropout)(p4)\n",
        "\n",
        "    c5 = conv2d_block(p4, n_filters=n_filters*16, kernel_size=3, batchnorm=batchnorm)\n",
        "\n",
        "    f1 = Flatten()(c5)\n",
        "    d1 = Dense(32, activation = 'relu')(f1)\n",
        "    d1 = Dense(16, activation = 'relu')(d1)\n",
        "\n",
        "    outputs = Dense(1, activation = 'relu')(d1)\n",
        "    model = Model(inputs=[input_img], outputs=[outputs])\n",
        "    return model\n",
        "\n",
        "def get_detector_cnn_deep(input_img, n_filters=16, dropout=0.5, batchnorm = True):\n",
        "    c1 = conv2d_block(input_img, n_filters=n_filters*1, kernel_size=3, batchnorm=batchnorm)\n",
        "    p1 = MaxPooling2D((2, 2)) (c1)\n",
        "    p1 = Dropout(dropout*0.5)(p1)\n",
        "\n",
        "    c2 = conv2d_block(p1, n_filters=n_filters*2, kernel_size=3, batchnorm=batchnorm)\n",
        "    p2 = MaxPooling2D((2, 2)) (c2)\n",
        "    p2 = Dropout(dropout)(p2)\n",
        "\n",
        "    c3 = conv2d_block(p2, n_filters=n_filters*4, kernel_size=3, batchnorm=batchnorm)\n",
        "    p3 = MaxPooling2D((2, 2)) (c3)\n",
        "    p3 = Dropout(dropout)(p3)\n",
        "\n",
        "    c4 = conv2d_block(p3, n_filters=n_filters*8, kernel_size=3, batchnorm=batchnorm)\n",
        "    p4 = MaxPooling2D(pool_size=(2, 2)) (c4)\n",
        "    p4 = Dropout(dropout)(p4)\n",
        "\n",
        "    c5 = conv2d_block(p4, n_filters=n_filters*16, kernel_size=3, batchnorm=batchnorm)\n",
        "\n",
        "    f1 = Flatten()(c5)\n",
        "    d1 = Dense(32, activation = 'relu')(f1)\n",
        "    d1 = Dense(16, activation = 'relu')(d1)\n",
        "\n",
        "    outputs = Dense(1, activation = 'sigmoid')(d1)\n",
        "    model = Model(inputs=[input_img], outputs=[outputs])\n",
        "    return model\n",
        "\n",
        "vert_bounds = (80,150)\n",
        "hor_bounds = (200,230)\n",
        "def rotate_winds(u,v,rot_deg):\n",
        "    angle = np.arctan2(v,u)\n",
        "    rot_rad = rot_deg*np.pi/180\n",
        "    if angle<0:\n",
        "        angle = 2*np.pi+angle\n",
        "\n",
        "    mag = (u**2+v**2)**(1/2)\n",
        "    u_new = np.cos(angle+rot_rad)*mag\n",
        "\n",
        "    v_new = np.sin(angle+rot_rad)*mag\n",
        "\n",
        "    return([u_new,v_new])\n",
        "\n",
        "\n",
        "def get_plumes(plume_array, num_of_plumes):\n",
        "    plume_set = np.asarray(random.choices(plume_array, k = num_of_plumes))\n",
        "    print(plume_set.shape)\n",
        "    out = np.zeros((len(plume_set),3,128,128))\n",
        "    for j in range(len(plume_set)):\n",
        "        vert_start = np.random.randint(vert_bounds[0],vert_bounds[1])\n",
        "        vert_end = vert_start+128\n",
        "        hor_start = np.random.randint(hor_bounds[0],hor_bounds[1])\n",
        "        hor_end = hor_start+128\n",
        "#        print(vert_start,vert_end, hor_start, hor_end)\n",
        "#        print(plume_set[j,0,vert_start:vert_end, hor_start:hor_end].shape)\n",
        "        out[j,0,:,:] = plume_set[j,0,vert_start:vert_end, hor_start:hor_end]\n",
        "        rot_deg = np.random.randint(1,360)\n",
        "        out[j,0,:,:] = sp.ndimage.rotate(out[j,0,:,:],rot_deg, axes=(0,1), reshape=False)\n",
        "        winds = rotate_winds(plume_set[j,1,0,0], plume_set[j,2,0,0], rot_deg)\n",
        "\n",
        "        out[j,1,:,:] = winds[0]\n",
        "        out[j,2,:,:] = winds[1]\n",
        "    return(out)\n",
        "\n",
        "def jac_simple(y_pred,y_true):\n",
        "    y_pred = y_pred>0.5\n",
        "    intersection = np.sum(y_pred & y_true, (1,2))\n",
        "    unity = np.sum(y_pred | y_true, (1,2))\n",
        "    jac = intersection/unity\n",
        "    return jac\n",
        "\n",
        "def jac_simple_weighted(y_pred,y_true):\n",
        "    y_pred = y_pred>0.5\n",
        "    intersection = np.sum(y_pred & y_true, (1,2))\n",
        "    unity = np.sum(y_pred | y_true, (1,2))\n",
        "    plume\n",
        "\n",
        "    jac = intersection/unity\n",
        "    return jac\n",
        "\n",
        "def get_enhancement_captured(plume_original, pred_mask):\n",
        "    total_enhancement = np.sum(plume_original, (1,2))\n",
        "\n",
        "    pred_enh = np.sum(plume_original*pred_mask, (1,2))\n",
        "\n",
        "    fraction_total_enhancement = pred_enh/total_enhancement\n",
        "    return fraction_total_enhancement\n",
        "\n",
        "#IME Funcs\n",
        "m_a = 0.00289647 #mass of dry air kg/mol\n",
        "m_ch4 = 0.01614 #mass of methane kg/mol\n",
        "omega_a = 1.03*(10**4) #column mass of dry air kg/m2\n",
        "\n",
        "\n",
        "def get_IME(img,mask, a = 25**2):\n",
        "    not_mask = np.logical_not(mask)\n",
        "    back_img = img*not_mask\n",
        "    back_vals = back_img[back_img != False]\n",
        "    back_vals = back_vals[back_vals != np.nan]\n",
        "\n",
        "#    print(back_vals)\n",
        "    background = np.median(back_vals)\n",
        "    #testing\n",
        "#    background = np.percentile(back_vals,80)\n",
        "#    background = 0\n",
        "\n",
        "    #testing\n",
        "    delta_x = (img - background)*mask\n",
        " #   print(np.sum(delta_x))\n",
        "    IME = np.sum(delta_x*m_ch4*a)\n",
        "    return IME\n",
        "\n",
        "def get_ueff(u10,v10):\n",
        "    #This is the approach use by varon et al but may need to recalibrate\n",
        "    u10 = np.mean(u10)\n",
        "    v10 = np.mean(v10)\n",
        "    u = (u10**2+v10**2)**(1/2)\n",
        "    ueff = np.log(u)+0.5\n",
        "#    print('ueff = '+str(ueff))\n",
        "    return ueff\n",
        "\n",
        "def get_source_rate(img,mask,u10,v10, a = 25**2):\n",
        "    L = (np.sum(mask)*a)**(1/2)\n",
        "    IME = get_IME(img,mask,a)\n",
        "    u_eff = get_ueff(u10,v10)\n",
        "    Q = u_eff/L*IME/(10**3)*3600 #t/h\n",
        "#    print('Q = '+str(Q))\n",
        "    return Q\n",
        "\n",
        "def get_source_rate_u10(img,mask,U10,ueff_slope = 0.18801551, ueff_int = 0.8801000582013674, a = 25**2):\n",
        "    L = (np.sum(mask)*a)**(1/2)\n",
        "    IME = get_IME(img,mask,a)\n",
        "#    u_eff = np.log(U10)+0.5\n",
        "    u_eff = ueff_slope*U10+ueff_int\n",
        "#    u_eff =1\n",
        "#    print(u_eff,L, IME)\n",
        "    Q = u_eff/L*IME/(10**3)*3600 #t/h\n",
        "#    print('Q = '+str(Q))\n",
        "    return Q\n",
        "\n",
        "def ueff_calibration(img,mask, Q,a = 25**2):\n",
        "    Q = Q/3600\n",
        "#    print(Q)\n",
        "    L = (np.sum(mask)*a)**(1/2)\n",
        "    IME = get_IME(img,mask,a)\n",
        "#    print(IME)\n",
        "\n",
        "    ueff = Q*L/IME\n",
        "    return ueff\n",
        "\n",
        "def get_ime_out(x_10, y_10):\n",
        "    IME = np.zeros(len(x_10))\n",
        "    u10 = np.zeros(len(x_10))\n",
        "\n",
        "    for j in range(len(x_10)):\n",
        "        IME[j] = get_IME(x_10[j,:,:,0],x_10[j,:,:,2]>0.5)\n",
        "        u10[j] = np.mean(x_10[j,:,:,1])\n",
        "\n",
        "    non_zero_winds_loc = np.where(u10>0)\n",
        "    IME = IME[non_zero_winds_loc]\n",
        "    u10 = u10[non_zero_winds_loc]\n",
        "    x_10 = x_10[non_zero_winds_loc]\n",
        "    y_10 = y_10[non_zero_winds_loc]\n",
        "\n",
        "    sr = np.zeros(len(x_10))\n",
        "\n",
        "    for j in range(len(x_10)):\n",
        "        sr[j] = get_source_rate_u10(x_10[j,:,:,0],x_10[j,:,:,2]>0.5, u10[j])\n",
        "\n",
        "    return sr*1000,y_10, non_zero_winds_loc\n",
        "\n"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "d78fc089",
      "metadata": {
        "id": "d78fc089"
      },
      "source": [
        "Okay so the first thing we need to do is import your data and get it formatted in a way that will play nice with our model.\n",
        "\n",
        "You will need two arrays of data:\n",
        "\n",
        "(1) Methane enhancement fields with even, equivalent horizontal and vertical dimensions (I recommend modest\n",
        "dimensions like 128 or 64 to start out to keep training time quick)\n",
        "\n",
        "(2) Binary mask arrays of the plume pixels in each image"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "112e5cf3",
      "metadata": {
        "id": "112e5cf3"
      },
      "outputs": [],
      "source": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "630f7271",
      "metadata": {
        "id": "630f7271"
      },
      "outputs": [],
      "source": [
        "#Once you have your formatted image and mask arrays the following line of code will randomly subset\n",
        "#90% of them for training and 10% for testing. This ratio can be changed by altering the test_size\n",
        "#parameter. The random_state command is set to 1 to allow for repeatability, but you can change\n",
        "#or remove that parameter to get other potential subsets\n",
        "\n",
        "\n",
        "x_train, x_test, y_train, y_test = train_test_split(imgs, masks, test_size = 0.1, random_state = 1)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "2580a07c",
      "metadata": {
        "id": "2580a07c"
      },
      "outputs": [],
      "source": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "3ab4e9bc",
      "metadata": {
        "id": "3ab4e9bc"
      },
      "outputs": [],
      "source": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "5603661b",
      "metadata": {
        "id": "5603661b"
      },
      "outputs": [],
      "source": []
    },
    {
      "cell_type": "markdown",
      "id": "7f16e72a",
      "metadata": {
        "id": "7f16e72a"
      },
      "source": [
        "Okay now we create and train the model"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "1db7b93e",
      "metadata": {
        "id": "1db7b93e"
      },
      "outputs": [],
      "source": [
        "#Create model\n",
        "\n",
        "\n",
        "#These two values should be set to the dimensions of your enhancement fields\n",
        "#Altering these should be the only change needed in this cell\n",
        "\n",
        "im_height = 128\n",
        "im_width = 128\n",
        "border = 5\n",
        "\n",
        "input_img = Input((im_height, im_width,1), name='img')\n",
        "\n",
        "\n",
        "\n",
        "model_mask = get_unet(input_img, n_filters=16, dropout=0.05, batchnorm=True)\n",
        "\n",
        "model_mask.compile(optimizer=Adam(), loss=jaccard_coef_loss, metrics=[\"accuracy\",jaccard_coef])\n",
        "model_mask.summary()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "2082d3ea",
      "metadata": {
        "id": "2082d3ea"
      },
      "outputs": [],
      "source": [
        "#Now that the model is built, we need to train it\n",
        "#This cell will take a while to run depending on your training dimensions and image count\n",
        "#I have set the number of epoch's to 10, but you should be able to go up to about 20 without overfitting\n",
        "\n",
        "\n",
        "model_mask.fit(x_train, y_train, batch_size = 32, epochs = 10)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "1c51ff06",
      "metadata": {
        "id": "1c51ff06"
      },
      "outputs": [],
      "source": [
        "#This takes your model and applies it over the test dataset to see how well it performs on data it\n",
        "#Hasn't seen before\n",
        "\n",
        "\n",
        "model_mask.evaluate(x_test,y_test)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "a956f970",
      "metadata": {
        "id": "a956f970"
      },
      "outputs": [],
      "source": [
        "#You can save your trained model weights so you won't have to perform the training again\n",
        "\n",
        "model_mask.save(file_path+\"file_name\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "955641f0",
      "metadata": {
        "id": "955641f0"
      },
      "outputs": [],
      "source": [
        "#And here is how to load those back in\n",
        "\n",
        "model_mask.load_weights(file_path+\"file_name\")"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "3ec078b5",
      "metadata": {
        "id": "3ec078b5"
      },
      "source": [
        "With the model now trained, you can use it to predict masks for batches of images. Remember, they must be of the model's defined input dimension"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "0691fe1e",
      "metadata": {
        "id": "0691fe1e"
      },
      "outputs": [],
      "source": [
        "#This line will produce predicted masks for all of the images in imgs\n",
        "#Masks are not output as binary, but as confidence values between 0-1 for each pixel\n",
        "#Threshold the masks at >0.5 for binary masking\n",
        "#you can also use >0.75 or higher to be a little stricter\n",
        "\n",
        "pred_masks = model_mask.predict(imgs)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "be582e74",
      "metadata": {
        "id": "be582e74"
      },
      "outputs": [],
      "source": [
        "#If you want to take a look at your performance for individual images we can mask out\n",
        "#All the images in the test dataset, and compute their jaccard scores with the following\n",
        "\n",
        "pred_masks_test = model_mask.predict(x_test)\n",
        "\n",
        "jac_test = jac_simple(pred_masks_test[:,:,:,0]>0.5, y_test>0.5)\n",
        "\n",
        "#You can then plot this jac test against different image characteristics (such as background std)\n",
        "#To get an idea of how the model is performing in various conditions\n",
        "#As a general rule of thumb Jaccard scores above 0.3 are quite good"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3 (ipykernel)",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.0"
    },
    "colab": {
      "provenance": []
    }
  },
  "nbformat": 4,
  "nbformat_minor": 5
}